install.packages("pacman")
Error in install.packages : Updating loaded packages
library(pacman)
Installing Packages
pacman::p_load(arules, arulesViz, ggplot2, colorspace)
Loading packages
dataset <- read.transactions("./AssociationRules.csv", sep=" ")
Read from csv file transactions of list items
str(dataset)
Formal class 'transactions' [package "arules"] with 3 slots
..@ data :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
.. .. ..@ i : int [1:98063] 5 20 22 30 34 44 53 5 22 23 ...
.. .. ..@ p : int [1:10001] 0 7 19 26 40 48 62 75 86 93 ...
.. .. ..@ Dim : int [1:2] 98 10000
.. .. ..@ Dimnames:List of 2
.. .. .. ..$ : NULL
.. .. .. ..$ : NULL
.. .. ..@ factors : list()
..@ itemInfo :'data.frame': 98 obs. of 1 variable:
.. ..$ labels: chr [1:98] "item1" "item10" "item100" "item11" ...
..@ itemsetInfo:'data.frame': 0 obs. of 0 variables
Show structure
summary(size(dataset))
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 7.000 10.000 9.806 12.000 25.000
itemFrequencyPlot(dataset, topN = 10)
Show plot of items
set.seed = 220
associa_rules = apriori(data=dataset,
parameter=list(support=0.01,
confidence=0))
summary(associa_rules)
set of 11524 rules
rule length distribution (lhs + rhs):sizes
1 2 3 4 5
89 2952 7206 1272 5
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 2.00 3.00 2.84 3.00 5.00
summary of quality measures:
support confidence coverage lift count
Min. :0.01000 Min. :0.0108 Min. :0.01000 Min. : 0.6717 Min. : 100.0
1st Qu.:0.01150 1st Qu.:0.1683 1st Qu.:0.03940 1st Qu.: 1.0076 1st Qu.: 115.0
Median :0.01400 Median :0.2490 Median :0.06330 Median : 1.1244 Median : 140.0
Mean :0.01891 Mean :0.2788 Mean :0.09342 Mean : 1.2284 Mean : 189.1
3rd Qu.:0.01990 3rd Qu.:0.3596 3rd Qu.:0.11040 3rd Qu.: 1.2803 3rd Qu.: 199.0
Max. :0.49480 Max. :1.0000 Max. :1.00000 Max. :19.4205 Max. :4948.0
mining info:
trustly_subset <- subset(associa_rules, confidence >= 0.5)
summary(trustly_subset)
set of 1165 rules
rule length distribution (lhs + rhs):sizes
2 3 4 5
62 753 348 2
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 3.000 3.000 3.249 4.000 5.000
summary of quality measures:
support confidence coverage lift count
Min. :0.01000 Min. :0.5000 Min. :0.0100 Min. : 1.011 Min. : 100.0
1st Qu.:0.01110 1st Qu.:0.5283 1st Qu.:0.0193 1st Qu.: 1.093 1st Qu.: 111.0
Median :0.01350 Median :0.5644 Median :0.0237 Median : 1.192 Median : 135.0
Mean :0.01816 Mean :0.5863 Mean :0.0321 Mean : 1.589 Mean : 181.6
3rd Qu.:0.01920 3rd Qu.:0.6143 3rd Qu.:0.0338 3rd Qu.: 1.424 3rd Qu.: 192.0
Max. :0.18770 Max. :1.0000 Max. :0.3699 Max. :19.420 Max. :1877.0
mining info:
trustly_subset <- subset(associa_rules, confidence > 0.5)
# Some 10 sample
sample_top_rules <- sample(associa_rules, 10)
# High confidence rules
rules_conf <- sort(associa_rules, by="confidence", decreasing=TRUE)
# High lift rules
rules_lift <- sort(associa_rules, by="lift", decreasing=TRUE)
# Top 10 rules by confidence
top_rules_conf <- head(rules_conf, n=10)
# Top 10 rules by lift
top_rules_lift = head(rules_lift, n=10)
plot(associa_rules,
method="scatterplot",
measure=c("support", "confidence"),
shading="lift"
)
plot(associa_rules,
method="scatterplot",
measure=c("support", "lift"),
shading="confidence"
)
Scatter plot of all rules
plot(associa_rules,
method="scatterplot",
measure=c("support", "confidence"),
shading="lift",
engine="htmlwidget"
)
plot: Too many rules supplied. Only plotting the best 1000 rules using measure lift (change parameter max if needed)To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
plot(associa_rules,
method="scatterplot",
measure=c("support", "lift"),
shading="confidence",
engine="htmlwidget"
)
plot: Too many rules supplied. Only plotting the best 1000 rules using measure confidence (change parameter max if needed)To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(top_rules_conf, method="paracoord")
plot(top_rules_lift, method="paracoord")
plot(top_rules_conf, method="matrix", measure="confidence")
Itemsets in Antecedent (LHS)
[1] "{item15,item30,item49}" "{item30,item49,item56}" "{item49,item56}" "{item15,item49,item56}" "{item49,item56,item84}" "{item15,item49,item84}"
[7] "{item15,item56,item77}"
Itemsets in Consequent (RHS)
[1] "{item30}" "{item84}" "{item15}" "{item56}"
Matrix based plot
plot(top_rules_lift, method="matrix", measure="lift")
Itemsets in Antecedent (LHS)
[1] "{item15,item30,item56}" "{item30,item56,item84}" "{item15,item30,item49}" "{item15,item56}" "{item15,item49}" "{item30,item49,item84}"
[7] "{item56,item84}" "{item15,item30,item84}" "{item15,item30,item77}" "{item30,item77,item84}"
Itemsets in Consequent (RHS)
[1] "{item56}" "{item49}"
plot(top_rules_lift, method="grouped")
plot(top_rules_conf, method="graph", engine="htmlwidget")
top_3_lift = head(top_rules_lift, n=3)
plot(top_3_lift, method="graph")
hight_trustly_rules <- subset(associa_rules, confidence > 0.8)
first_38_conf = head(sort(hight_trustly_rules, by="confidence", decreasing=TRUE), n=38)
plot(first_38_conf,
method="matrix",
shading=c("lift", "confidence"),
measure=c("lift", "confidence"),
control=list(reorder=FALSE)
)
Itemsets in Antecedent (LHS)
[1] "{item15,item49,item56}" "{item49,item56,item84}" "{item49,item56}" "{item15,item49,item84}" "{item30,item49,item56}" "{item15,item30,item49}"
[7] "{item15,item56,item77}" "{item15,item56,item84}" "{item49,item77,item84}" "{item16,item61,item77}" "{item20,item23}" "{item16,item34,item77}"
[13] "{item15,item49}" "{item5,item82,item99}" "{item3,item84,item95}" "{item13,item82,item99}" "{item23}" "{item25,item34,item77}"
[19] "{item82,item99}" "{item22,item3,item41}" "{item55}" "{item10,item44}" "{item83}" "{item23,item5}"
[25] "{item30,item56,item77}" "{item15,item30,item56}" "{item10,item22,item41}" "{item30,item49,item84}" "{item20,item25,item41}" "{item16,item25,item77}"
[31] "{item30,item95,item96}"
Itemsets in Consequent (RHS)
[1] "{item30}" "{item15}" "{item56}" "{item84}" "{item5}" "{item13}" "{item10}" "{item34}" "{item3}" "{item77}" "{item92}"